Unbalanced Panel of 531 NBA players from 2013 to 2018: Outcome Value over replacemente player (VORP) and 148 features before preprocessing.
#create correlation matrix
cor_num <- cor(df_num_mean)
#delete suggested variables with high correlation with other features
cor_high <- findCorrelation(cor_num, cutoff = .95)
names(df_num_mean[,cor_high]) # deleted variables due to collinearity
## [1] "FGA3_t1" "FTA_t1" "TRB_t1" "PSG_t1" "FGA3_t2"
## [6] "FTA_t2" "TRB_t2" "PSG_t2" "FGA3_t3" "FTA_t3"
## [11] "TRB_t3" "PSG_t3" "TRBPer_t1" "WS_t1" "TRBPer_t2"
## [16] "FG_t1" "FG2_t1" "Age_t1" "FG_t2" "FG2_t2"
## [21] "Age_t2" "FG_t3" "FG2_t3"
skim(x_mean_train)
## Skim summary statistics
## n obs: 988
## n variables: 121
##
## ── Variable type:factor ─────────────────────────────────────────────────────────────────────────
## variable missing complete n n_unique
## Pos_t1 0 988 988 5
## Pos_t2 0 988 988 5
## Pos_t3 0 988 988 5
## Tm_t1 0 988 988 31
## Tm_t2 0 988 988 31
## Tm_t3 0 988 988 31
## top_counts ordered
## PF: 223, SG: 210, PG: 192, C: 183 FALSE
## PF: 228, SG: 220, PG: 193, SF: 175 FALSE
## PF: 227, SG: 217, PG: 196, SF: 177 FALSE
## TOT: 107, GSW: 36, TOR: 36, CHI: 35 FALSE
## TOT: 86, HOU: 35, TOR: 35, BOS: 34 FALSE
## TOT: 98, UTA: 37, DEN: 35, HOU: 35 FALSE
##
## ── Variable type:numeric ────────────────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25 p50 p75
## Age_t3 0 988 988 24.18 4.19 17 21 24 27
## Ar3P_t1 0 988 988 0.29 0.21 0 0.099 0.3 0.44
## Ar3P_t2 0 988 988 0.27 0.19 0 0.12 0.27 0.39
## Ar3P_t3 0 988 988 0.25 0.17 0 0.16 0.25 0.33
## AST_t1 0 988 988 2.03 1.84 0 0.8 1.4 2.6
## AST_t2 0 988 988 2.19 1.77 0 1 2 2.4
## AST_t3 0 988 988 2.3 1.7 0 1.1 2.28 2.3
## ASTPer_t1 0 988 988 13.43 9.29 0 7.1 10.35 17.9
## ASTPer_t2 0 988 988 13.75 8.75 0 7.8 12.35 15.83
## ASTPer_t3 0 988 988 14.02 8.15 0 8.5 13.99 14.53
## BLK_t1 0 988 988 0.45 0.45 0 0.2 0.3 0.6
## BLK_t2 0 988 988 0.47 0.42 0 0.2 0.4 0.5
## BLK_t3 0 988 988 0.5 0.41 0 0.2 0.5 0.5
## BLKPer_t1 0 988 988 1.69 1.64 0 0.6 1.2 2.23
## BLKPer_t2 0 988 988 1.68 1.52 0 0.7 1.4 1.8
## BLKPer_t3 0 988 988 1.68 1.3 0 0.8 1.69 1.69
## BPM_t1 0 988 988 -0.79 3.28 -20.8 -2.6 -0.8 0.9
## BPM_t2 0 988 988 -0.5 2.83 -18.9 -1.72 -0.51 0.8
## BPM_t3 0 988 988 -0.35 2.7 -20.1 -1.2 -0.4 0.6
## DBPM_t1 0 988 988 -0.13 1.9 -10.2 -1.3 -0.1 1
## DBPM_t2 0 988 988 -0.071 1.64 -8.2 -1.1 -0.066 0.8
## DBPM_t3 0 988 988 -0.077 1.55 -13.2 -0.7 -0.094 0.6
## DRB_t1 0 988 988 3.01 1.76 0 1.78 2.7 3.9
## DRB_t2 0 988 988 3.18 1.61 0 2.1 3.18 3.7
## DRB_t3 0 988 988 3.21 1.5 0 2.3 3.21 3.5
## DRBPer_t1 0 988 988 15.32 6.86 0 10.5 14.1 19.3
## DRBPer_t2 0 988 988 15.15 5.61 0 10.97 15.17 17.9
## DRBPer_t3 0 988 988 14.93 5.06 0 11.5 14.98 16.7
## DWS_t1 0 988 988 1.46 1.12 -0.1 0.6 1.3 2.1
## DWS_t2 0 988 988 1.62 1.07 -0.1 0.9 1.62 2.1
## DWS_t3 0 988 988 1.73 1.03 -0.1 1.1 1.7 2.1
## eFG_t1 0 988 988 0.5 0.072 0 0.47 0.5 0.54
## eFG_t2 0 988 988 0.5 0.057 0 0.47 0.5 0.53
## eFG_t3 0 988 988 0.5 0.047 0.22 0.48 0.5 0.51
## FG2Per_t1 0 988 988 0.49 0.081 0 0.45 0.48 0.53
## FG2Per_t2 0 988 988 0.48 0.065 0 0.46 0.48 0.51
## FG2Per_t3 0 988 988 0.48 0.055 0 0.46 0.48 0.5
## FG3_t1 0 988 988 0.8 0.76 0 0.1 0.6 1.3
## FG3_t2 0 988 988 0.78 0.68 0 0.2 0.77 1.1
## FG3_t3 0 988 988 0.77 0.63 0 0.3 0.76 1
## FG3Per_t1 0 988 988 0.31 0.12 0 0.29 0.33 0.37
## FG3Per_t2 0 988 988 0.31 0.12 0 0.3 0.31 0.36
## FG3Per_t3 0 988 988 0.3 0.11 0 0.3 0.3 0.36
## FGA_t1 0 988 988 7.77 4.47 0 4.47 6.8 10.5
## FGA_t2 0 988 988 8.27 4 0 5.3 8.25 10.2
## FGA_t3 0 988 988 8.61 3.67 1 6.4 8.55 10
## FGA2_t1 0 988 988 5.53 3.58 0 2.9 4.7 7.8
## FGA2_t2 0 988 988 6.08 3.35 0 3.7 6.08 7.62
## FGA2_t3 0 988 988 6.45 3.14 0.2 4.5 6.43 7.3
## FGPer_t1 0 988 988 0.45 0.079 0 0.41 0.44 0.49
## FGPer_t2 0 988 988 0.45 0.063 0 0.42 0.45 0.47
## FGPer_t3 0 988 988 0.45 0.052 0.22 0.43 0.45 0.47
## FT_t1 0 988 988 1.62 1.43 0 0.7 1.2 2.1
## FT_t2 0 988 988 1.76 1.29 0 0.9 1.7 2
## FT_t3 0 988 988 1.85 1.2 0 1.1 1.84 1.9
## FTPer_t1 0 988 988 0.74 0.13 0 0.68 0.76 0.82
## FTPer_t2 0 988 988 0.74 0.11 0 0.71 0.74 0.81
## FTPer_t3 0 988 988 0.75 0.093 0 0.73 0.75 0.8
## FTr_t1 0 988 988 0.27 0.15 0 0.18 0.25 0.35
## FTr_t2 0 988 988 0.28 0.13 0 0.2 0.28 0.32
## FTr_t3 0 988 988 0.28 0.11 0 0.22 0.28 0.3
## G_t1 0 988 988 59.67 21.28 1 47 67 76
## G_t2 0 988 988 62.93 17.36 1 59 64 76
## G_t3 0 988 988 64.6 15.41 1 64 64.22 76
## GS_t1 0 988 988 30.28 29.53 0 2 19 60
## GS_t2 0 988 988 34.11 27.43 0 7 34.03 59.25
## GS_t3 0 988 988 37.02 25.61 0 15 36.5 58
## MP_t1 0 988 988 22.08 8.67 0.7 15.67 22.2 29.3
## MP_t2 0 988 988 23.46 7.72 2 18.67 23.42 29.42
## MP_t3 0 988 988 24.32 7.2 2.5 21.48 24.22 28.83
## OBPM_t1 0 988 988 -0.66 2.77 -16.4 -2 -0.6 0.6
## OBPM_t2 0 988 988 -0.43 2.41 -14.3 -1.5 -0.45 0.6
## OBPM_t3 0 988 988 -0.27 2.28 -16.7 -1.1 -0.3 0.5
## ORB_t1 0 988 988 0.97 0.81 0 0.4 0.7 1.4
## ORB_t2 0 988 988 1.05 0.77 0 0.5 1 1.3
## ORB_t3 0 988 988 1.12 0.76 0 0.6 1.12 1.2
## ORBPer_t1 0 988 988 5.12 4 0 2 3.6 7.82
## ORBPer_t2 0 988 988 5.23 3.64 0 2.2 5.1 7
## ORBPer_t3 0 988 988 5.55 3.65 0 2.8 5.53 6.3
## OWS_t1 0 988 988 1.67 2.15 -3.3 0.2 1.1 2.42
## OWS_t2 0 988 988 1.92 2.03 -3.3 0.6 1.9 2.4
## OWS_t3 0 988 988 2.12 2 -2.7 1 2.09 2.3
## PER_t1 0 988 988 14.13 5.05 -11.4 11 13.8 16.8
## PER_t2 0 988 988 14.47 4.32 -11.4 12.2 14.47 16.22
## PER_t3 0 988 988 14.79 3.84 -7.5 13 14.77 15.9
## PF_t1 0 988 988 1.86 0.7 0 1.4 1.9 2.3
## PF_t2 0 988 988 1.96 0.6 0 1.6 1.96 2.3
## PF_t3 0 988 988 2 0.56 0 1.8 1.99 2.3
## STL_t1 0 988 988 0.72 0.43 0 0.4 0.6 1
## STL_t2 0 988 988 0.77 0.41 0 0.5 0.76 0.9
## STL_t3 0 988 988 0.79 0.39 0 0.6 0.78 0.9
## STLPer_t1 0 988 988 1.61 0.76 0 1.2 1.5 2
## STLPer_t2 0 988 988 1.63 0.66 0 1.2 1.61 1.9
## STLPer_t3 0 988 988 1.64 0.6 0 1.3 1.62 1.8
## TOV_t1 0 988 988 1.25 0.8 0 0.7 1.1 1.6
## TOV_t2 0 988 988 1.37 0.74 0 0.9 1.36 1.6
## TOV_t3 0 988 988 1.44 0.68 0 1 1.43 1.6
## TOVPer_t1 0 988 988 12.79 4.27 0 10 12.6 15.2
## TOVPer_t2 0 988 988 13.07 3.58 0 10.8 13.07 14.8
## TOVPer_t3 0 988 988 13.21 3.2 0 11.67 13.23 14.1
## TRBPer_t3 0 988 988 10.25 3.97 0 7.3 10.26 11.6
## TS_t1 0 988 988 0.53 0.069 0 0.51 0.54 0.57
## TS_t2 0 988 988 0.53 0.055 0 0.51 0.53 0.56
## TS_t3 0 988 988 0.53 0.045 0.26 0.52 0.53 0.55
## USG_t1 0 988 988 19.11 5.32 0 15.3 18.5 22.2
## USG_t2 0 988 988 19.42 4.56 0 16.5 19.42 21.72
## USG_t3 0 988 988 19.86 4.17 8.6 17.7 19.82 21.4
## VORP_t1 0 988 988 0.81 1.48 -1.4 -0.1 0.3 1.3
## VORP_t2 0 988 988 0.95 1.36 -1.4 0.075 0.9 1.3
## VORP_t3 0 988 988 1.09 1.32 -1.6 0.27 1.05 1.2
## WS_t2 0 988 988 3.53 2.78 -2.1 1.6 3.53 4.4
## WS_t3 0 988 988 3.86 2.7 -1.5 2.3 3.8 4.4
## WS48_t1 0 988 988 0.093 0.068 -0.3 0.055 0.091 0.13
## WS48_t2 0 988 988 0.096 0.059 -0.3 0.069 0.096 0.12
## WS48_t3 0 988 988 0.099 0.053 -0.29 0.082 0.098 0.12
## p100 hist
## 38 ▃▇▅▆▃▁▁▁
## 1 ▇▅▆▆▃▁▁▁
## 1 ▆▃▇▃▂▁▁▁
## 0.87 ▅▂▇▂▂▁▁▁
## 11.2 ▇▅▂▁▁▁▁▁
## 11.7 ▇▇▂▁▁▁▁▁
## 11.1 ▅▇▂▁▁▁▁▁
## 57.3 ▅▇▃▂▁▁▁▁
## 52.7 ▃▇▇▂▂▁▁▁
## 49.3 ▂▅▇▁▁▁▁▁
## 3.7 ▇▃▁▁▁▁▁▁
## 3.7 ▇▆▁▁▁▁▁▁
## 3 ▆▇▁▁▁▁▁▁
## 15.1 ▇▂▁▁▁▁▁▁
## 15.1 ▇▂▁▁▁▁▁▁
## 9.3 ▆▇▁▁▁▁▁▁
## 15.6 ▁▁▁▃▇▂▁▁
## 12.5 ▁▁▁▂▇▃▁▁
## 11.6 ▁▁▁▁▇▃▁▁
## 12.1 ▁▁▂▇▃▁▁▁
## 5.8 ▁▁▁▃▇▃▁▁
## 5.5 ▁▁▁▁▂▇▂▁
## 10.3 ▃▇▆▃▂▁▁▁
## 10.3 ▂▅▇▂▁▁▁▁
## 10.1 ▁▃▇▂▁▁▁▁
## 100 ▆▇▁▁▁▁▁▁
## 45.1 ▁▅▇▃▂▁▁▁
## 45.1 ▁▃▇▂▁▁▁▁
## 6 ▇▇▅▅▂▁▁▁
## 6.6 ▅▅▇▂▁▁▁▁
## 6.6 ▃▃▇▂▁▁▁▁
## 1 ▁▁▁▇▇▁▁▁
## 0.74 ▁▁▁▁▂▇▁▁
## 0.71 ▁▁▁▂▇▂▁▁
## 1 ▁▁▁▇▅▁▁▁
## 0.74 ▁▁▁▁▃▇▁▁
## 0.86 ▁▁▁▁▇▁▁▁
## 5.1 ▇▃▃▁▁▁▁▁
## 5.1 ▇▇▃▁▁▁▁▁
## 3.6 ▆▇▂▂▁▁▁▁
## 1 ▂▁▇▃▁▁▁▁
## 1 ▁▁▇▂▁▁▁▁
## 1 ▁▁▇▂▁▁▁▁
## 24 ▃▇▆▃▃▂▁▁
## 22 ▂▅▇▃▂▂▁▁
## 22.2 ▂▃▇▂▂▁▁▁
## 18.6 ▅▇▆▃▂▁▁▁
## 20.4 ▃▆▇▃▂▁▁▁
## 20.4 ▂▃▇▂▁▁▁▁
## 1 ▁▁▁▇▂▁▁▁
## 0.74 ▁▁▁▁▇▃▁▁
## 0.71 ▁▁▂▇▂▁▁▁
## 9.2 ▇▅▂▁▁▁▁▁
## 8.8 ▆▇▂▁▁▁▁▁
## 8.8 ▃▇▂▁▁▁▁▁
## 1 ▁▁▁▁▂▆▇▂
## 1 ▁▁▁▁▂▇▇▁
## 1 ▁▁▁▁▁▇▆▁
## 1.5 ▅▇▂▁▁▁▁▁
## 1.22 ▂▇▃▁▁▁▁▁
## 0.94 ▁▃▇▂▁▁▁▁
## 83 ▁▁▁▂▂▂▅▇
## 82 ▁▁▁▁▁▂▇▇
## 83 ▁▁▁▁▁▂▇▆
## 82 ▇▂▂▂▁▁▂▃
## 82 ▇▂▂▆▁▂▂▅
## 82 ▅▂▁▇▁▁▂▃
## 42 ▁▃▆▇▇▇▆▁
## 38.7 ▁▂▂▃▇▃▅▂
## 38.7 ▁▁▂▂▇▂▂▂
## 12.4 ▁▁▁▃▇▂▁▁
## 12.4 ▁▁▁▃▇▁▁▁
## 9.6 ▁▁▁▁▇▃▁▁
## 5.3 ▇▅▂▁▁▁▁▁
## 5.4 ▆▇▂▁▁▁▁▁
## 5.5 ▅▇▂▁▁▁▁▁
## 21.9 ▇▅▂▂▂▁▁▁
## 21.9 ▇▇▂▂▂▁▁▁
## 46.8 ▇▂▁▁▁▁▁▁
## 13.8 ▁▇▆▂▁▁▁▁
## 14.8 ▁▆▇▂▁▁▁▁
## 14.8 ▁▅▇▂▁▁▁▁
## 40.8 ▁▁▁▇▆▁▁▁
## 31.5 ▁▁▁▂▇▃▁▁
## 31.6 ▁▁▁▂▇▂▁▁
## 6 ▁▅▇▆▁▁▁▁
## 4.1 ▁▁▂▇▅▂▁▁
## 4.1 ▁▁▂▇▃▂▁▁
## 2.3 ▃▇▇▅▂▁▁▁
## 2.5 ▃▅▇▂▁▁▁▁
## 2.5 ▂▃▇▂▁▁▁▁
## 11.1 ▆▇▁▁▁▁▁▁
## 8.2 ▂▇▂▁▁▁▁▁
## 7.9 ▁▇▂▁▁▁▁▁
## 5.7 ▆▇▃▂▁▁▁▁
## 5.7 ▃▇▂▂▁▁▁▁
## 4.4 ▁▃▇▂▁▁▁▁
## 43.6 ▁▆▇▂▁▁▁▁
## 29.2 ▁▁▃▇▃▁▁▁
## 37.5 ▁▁▇▂▁▁▁▁
## 27.6 ▁▃▇▂▂▁▁▁
## 1 ▁▁▁▂▇▁▁▁
## 0.73 ▁▁▁▁▁▇▅▁
## 0.7 ▁▁▁▁▃▇▁▁
## 41.7 ▁▁▅▇▅▂▁▁
## 38.4 ▁▁▂▆▇▂▁▁
## 38.4 ▁▂▃▇▂▁▁▁
## 12.4 ▇▆▁▁▁▁▁▁
## 9.8 ▃▇▂▁▁▁▁▁
## 9.8 ▂▇▂▁▁▁▁▁
## 19.2 ▂▆▇▂▁▁▁▁
## 19.3 ▂▅▇▂▁▁▁▁
## 0.43 ▁▁▁▅▇▂▁▁
## 0.32 ▁▁▁▁▅▇▁▁
## 0.32 ▁▁▁▁▃▇▁▁
# Hyperparameter Grid for Random Forest
rfGrid <- expand.grid(
mtry = seq(round(length(x_mean_train)/3-5,0), round(length(x_mean_train)/3+5,0), by=2),
#mtry = round(length(x_mean_train)/3,0),
splitrule = "variance",
min.node.size = 5
)
# Random Forest via Caret's Train function & ranger lib
set.seed(1234)
rf <- train(x = x_mean_train, y = y_train,
method = "ranger",
num.trees = 500,
tuneGrid = rfGrid,
trControl = trainControl(method = "cv"),
importance = "permutation")
plot(rf)
#predict yhat for test set
rfPred <- predict(rf, x_mean_test)
#data frame to compare predictions to outcomes in test data
predictions <- data.frame(Player = df_pred$Player,
Year = df_pred$Year_t1+1,
Pos = df_pred$Pos_t1,
y = y_test,
y_hat = rfPred) %>%
mutate(error = y_hat - y) %>%
mutate_if(is.factor, as.character)
#predictions df for training data
predictions_training <- data.frame(Player = df[trainingRow, ]$Player,
Year = df[trainingRow, ]$Year_t1+1,
Pos = df[trainingRow, ]$Pos_t1,
y = y_train,
y_hat = predict(rf)) %>%
mutate(error = y_hat - y)
The test data has 173 observations with mean y of 0.76 and standard deviation of 1.43. The RMSE is 0.7388318; the MAE is 0.4995466.
The test data has 988 observations with mean y of 0.77 and standard deviation of 1.46.
predictions_2019 %>%
mutate(vorp = round(vorp,2),
Wins_pred_2019 = round(Wins_pred_2019,2),
Error_Pred_Vegas = round(Error_Pred_Vegas,2),
Error_Pelton_Vegas = round(Error_Pelton_Vegas,2),
Sum_Error = round(Sum_Error,2)
) %>%
datatable(options = list(scrollX = TRUE))